###
### 5/23/07
### DJH
###
### code for submission to cluster:
### condor_submit_util -i congressOS033008.R  -f -n -1

library(e1071)
library(ReadMe)
underg <- undergrad(control="/nfs/fs1/projects/poliblog/replication/congress/control10.txt")
underg1 <- underg

source("/nfs/fs1/projects/poliblog/replication/undergrad2.R")
undergff <- undergrad2(control="/nfs/fs1/projects/poliblog/replication/congress/control10.txt")

###recode to be based on partisanship
underg1$trainingset$TRUTH[underg$trainingset$TRUTH==3] <- 1
underg1$trainingset$TRUTH[underg$trainingset$TRUTH==2] <- 4

####extract page numbers
pgnumvec <- c()
for(i in 1:3838){
  pgnumvec <- c(pgnumvec,substr(strsplit(as.character(underg1$trainingset$FILENAME[i]),"_")[[1]][3],start=1,stop=4))
}
pgnumvecn <- as.numeric(pgnumvec)

####even vs. odd
evenpgs <- (round(pgnumvecn/2)==pgnumvecn/2)*1

###re-order by page number
underg2 <- underg1
underg2$trainingset <- underg1$trainingset[evenpgs==1,]
underg2$testset  <- underg1$trainingset[evenpgs==0,]  

undergff2 <- undergff
undergff2$trainingset <- undergff$trainingset[evenpgs==1,]
undergff2$testset  <- undergff$trainingset[evenpgs==0,]  

### run readme
#rout <- readme(underg2,n.subset=1000,features=11,boot.se=T,printit=F)
#save(rout,file="/nfs/fs1/projects/poliblog/replication/congress/congressOSOUTdd.Rdata")
